In [1]:
import pandas as pd
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt;
In [2]:
plt.rcParams['figure.figsize'] = (20, 10)
In [3]:
url = 'https://data.seattle.gov/api/views/65db-xm6k/rows.csv?accessType=DOWNLOAD'
df = pd.read_csv(url, parse_dates=True)
In [4]:
df.head()
Out[4]:
In [5]:
df.shape
Out[5]:
In [6]:
df.index = pd.DatetimeIndex(df.Date)
In [7]:
df.head()
Out[7]:
In [8]:
df.drop(columns=['Date'], inplace=True)
df.head()
Out[8]:
In [ ]:
df['total'] = df['Fremont Bridge East Sidewalk'] + df['Fremont Bridge West Sidewalk']
df.drop(columns=['Fremont Bridge East Sidewalk', 'Fremont Bridge West Sidewalk'], inplace=True)
In [ ]:
df.resample('D').sum().plot()
Out[ ]:
In [ ]:
df.resample('M').sum().plot()
Out[ ]:
In [ ]:
df.groupby(df.index.hour).mean().plot()
Out[ ]:
In [ ]:
pivoted_data = df.pivot_table('total', index=df.index.hour, columns=df.index.date)
pivoted_data.iloc[:5, :5]
Out[ ]:
In [ ]:
# plot of this dates together
pivoted_data.plot(legend=False, alpha=0.1)
In [ ]:
from sklearn.decomposition import PCA
X = pivoted_data.fillna(0).T.values;
X.shape
In [ ]:
pca = PCA(2, svd_solver='full').fit(X)
X_PCA = pca.transform(X)
In [ ]:
X_PCA.shape
In [ ]:
plt.scatter(X_PCA[:, 0], X_PCA[:, 1])
In [ ]:
dayofweek = pd.DatetimeIndex(pivoted_data.columns).dayofweek
plt.scatter(X_PCA[:, 0], X_PCA[:, 1], c=dayofweek, cmap='rainbow')
plt.colorbar();
In [ ]:
from sklearn.mixture import GaussianMixture
In [ ]:
gmm = GaussianMixture(2)
gmm.fit(X_PCA)
labels = gmm.predict(X_PCA)
labels
In [ ]:
plt.scatter(X_PCA[:, 0], X_PCA[:, 1], c=labels, cmap='rainbow')